# -*- coding: utf-8 -*-
import asyncio
import json
import os
import random
import time
import requests
from urllib.parse import urlparse, parse_qs
from pyppeteer import launch
from pyppeteer.network_manager import Response, Request
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.middlewares.agents.user_agents import agents
from zc_core.middlewares.proxies.proxy_facade import ProxyFacade

from ctaxccgp.simple.bak2.data_parser import parse_order_item, parse_total_page
from ctaxccgp.simple.simple_dao import SimpleDao
from ctaxccgp.simple.slider_util import slide_move
from ctaxccgp.simple import cache_util as cache

proxy = ''
proxy_facade = ProxyFacade()
# 静态资源缓存
static_cache = {}


class ItemCrawler(object):
    bind_count = 0
    max_bind_count = 2
    item_url_tpl = 'https://ctaxccgp.zcygov.cn/items/{}'
    item_api_url_tpl = 'https://ctaxccgp.zcygov.cn/front/detail/item/{}?timestamp={}'

    session = requests.Session()
    session.cookies.set('SESSION', 'ZDhiOGViYjEtNzI2NC00YjA2LWI0YzEtZWM5Mzk4ZDg5OWQz')

    broke_pagination = False
    total_page = None
    page_list = None
    dao = SimpleDao()

    def __init__(self, sku_id, *args, **kwargs):
        super(ItemCrawler, self).__init__(*args, **kwargs)
        self.sku_id = sku_id
        self.item_url = self.item_url_tpl.format(sku_id)

    async def init_context(self):
        # 初始化
        tmp_dir = 'E:/TempData/{}'.format(int(time.time()))
        if not os.path.exists(tmp_dir):
            os.makedirs(tmp_dir)
        self.browser = await launch(
            headless=False,
            userDataDir=tmp_dir,
            devtools=False,
            ignoreDefaultArgs=['--enable-automation']
        )
        self.page = await self.browser.newPage()

        # 参数设置
        await self.page.setViewport({"width": 1440, "height": 1080})
        agent = random.choice(agents)
        await self.page.setUserAgent(agent)
        await self.page.evaluate('''() => {Object.defineProperty(navigator, 'webdriver', {get: () => undefined})}''')

        # 注册拦截器
        await self.page.setRequestInterception(True)
        self.page.on("request", self.request_interceptor)
        self.page.on("response", self.response_interceptor)

    async def destroy_context(self):
        # 关闭浏览器
        await self.page.close()
        await self.browser.close()
        return

    async def run(self):
        sku_status = await self.check_sku_status()
        if not sku_status:
            return

        # 初始化浏览器
        await self.init_context()
        # 打开/刷新商品详情页面
        await self.page.goto(self.item_url)
        await self.reload_page()
        # 点击分页
        await self.get_next_page()
        # 关闭浏览器
        await self.destroy_context()

    # 商品详情接口
    async def check_sku_status(self):
        rs = self.session.get(
            url=self.item_api_url_tpl.format(self.sku_id, int(time.time())),
            headers={
                'Connection': 'keep-alive',
                'Cache-Control': 'max-age=0',
                'Upgrade-Insecure-Requests': '1',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Referer': 'https://ctaxccgp.zcygov.cn/',
                'Accept-Encoding': 'gzip, deflate, br',
                'Accept-Language': 'zh-CN,zh;q=0.8',
            },
            timeout=30,
            # proxies={
            #     "http": "http://{}".format(proxy),
            #     "https": "https://{}".format(proxy),
            # },
        )
        if rs:
            rs_json = json.loads(rs.content)
            print('商品: %s' % rs_json)
            return rs_json.get('success', True)

        print('下架: %s' % self.sku_id)
        return False

    # 刷新页面
    async def reload_page(self):
        try:
            await self.page.reload()
            await self.page.waitFor(random.randint(500, 1200))
            await self.page.waitForSelector('div#tab-dealrecord')
            await self.page.click('div#tab-dealrecord')
            await self.page.waitFor(random.randint(1200, 1800))
            # 清理bind记录
            self.bind_count = 0
        except Exception:
            # 清理老环境
            await self.destroy_context()
            await asyncio.sleep(1.2)
            # 初始化浏览器
            await self.init_context()
            # 打开/刷新商品详情页面
            await self.page.goto(self.item_url)
            await asyncio.sleep(1.5)
            await self.reload_page()

    # 检查/处理滑块
    async def check_slider(self):
        slider = await self.page.querySelector('#nc_nvc_wrapper')
        if slider:
            is_hidden = await self.page.evaluate(
                '()=>{return window.getComputedStyle(document.getElementById("nc_nvc_wrapper")).display === "none";}')
            if not is_hidden:
                return await slide_move(self.page)

    async def get_next_page(self):
        try:
            if self.page_list is not None:
                while len(self.page_list) and not self.broke_pagination:
                    curr_page = self.page_list.pop()
                    await self.get_page(curr_page)
            else:
                await self.check_slider()
                await asyncio.sleep(3)
                await self.get_next_page()
        except Exception:
            # await self.check_slider()
            await self.page.waitFor(random.randint(500, 1500))
            await self.reload_page()

    async def get_page(self, page_no):
        try:
            await self.check_slider()
            await self.page.waitFor(random.randint(1000, 1800))
            await self.page.waitForSelector('div.po-pagination input.po-input__inner'),
            await self.page.evaluate('document.querySelector("div.po-pagination input.po-input__inner").value=""')
            await self.page.type('div.po-pagination input.po-input__inner', str(page_no))
            await self.page.waitFor(random.randint(300, 500))
            await self.page.click('div.po-pagination button.btn-go')
            await asyncio.sleep(random.randint(1200, 2200) / 1000)
        except Exception:
            # await self.check_slider()
            await self.page.waitFor(random.randint(500, 1500))
            await self.reload_page()

    async def response_interceptor(self, response: Response):
        if "front/detail/item/dealRecord" in response.url:
            url_params = parse_qs(urlparse(response.url).query)
            page_no = url_params['pageNo'][0]
            sku_id = url_params['itemId'][0]
            content = await response.text()
            json_data = json.loads(content)
            if json_data and json_data.get('success'):
                if self.total_page is None:
                    self.total_page = parse_total_page(json_data)
                    self.page_list = list(range(2, self.total_page + 1))
                    self.page_list.reverse()
                    print('页数: sku={}, total={}'.format(page_no, self.total_page))
                order_list, no_more_page = parse_order_item(json_data, sku_id)
                self.broke_pagination = no_more_page
                await self.dao.save_orders(order_list)
                print('订单: sku={}, page={}, count={}'.format(sku_id, page_no, len(order_list)))
            else:
                rs_code = json_data.get('code', '')
                if rs_code == '403':
                    # 403直接刷新
                    return await self.reload_page()
                elif rs_code == '400':
                    # 400观察统计，超过阈值刷新
                    self.bind_count = self.bind_count + 1
                    if self.bind_count > self.max_bind_count:
                        return await self.reload_page()

                print('Erro[{}] bind={}, page={} ==> {}'.format(rs_code, self.bind_count, page_no, json_data))

    async def request_interceptor(self, request: Request):
        # 通过类型丢弃请求
        if request.resourceType in ['image', 'websocket', 'other']:
            return await request.abort()
        # 按连接丢弃请求
        if 'galaxy/collect/push' in request.url:
            return await request.abort()

        # css加缓存
        if request.resourceType in ['stylesheet']:
            if not cache.exists(request.url):
                try:
                    response = self.session.request(
                        url=request.url,
                        method=request.method,
                        headers=request.headers,
                        data=request.postData,
                        timeout=30,
                    )
                    resp = {"body": response.content, "headers": response.headers, "status": response.status_code}
                    cache.add(request.url, resp)
                    return await request.respond(resp)
                except Exception as e:
                    return await request.abort()
            else:
                # css走缓存
                resp = cache.get(request.url)
                return await request.respond(resp)

        # 订单请求
        if 'front/detail/item/dealRecord' in request.url:
            try:
                # proxy = proxy_facade.get_proxy()
                proxy = '123.53.236.30:4225'
                # print('{} -> {}'.format(proxy, request.url))
                response = self.session.request(
                    url=request.url,
                    method=request.method,
                    headers=request.headers,
                    data=request.postData,
                    timeout=30,
                    # proxies={
                    #     "http": "http://{}".format(proxy),
                    #     "https": "https://{}".format(proxy),
                    # },
                )
                # 数据返回给浏览器
                resp = {"body": response.content, "headers": response.headers, "status": response.status_code}
                return await request.respond(resp)
            except Exception as e:
                return await request.abort()
        else:
            return await request.continue_()


async def batch_work():
    dist_list = SkuPoolDao().get_sku_pool_list()
    print('目标：%s' % (len(dist_list)))
    random.shuffle(dist_list)
    for sku in dist_list:
        await ItemCrawler(sku.get('_id')).run()


if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(batch_work())
